
function [dataStructure,modelSpec]=buildDataStructure(dataSetSpec,modelSpec)
%packs together data from different sources
%
%miranda 2015 smirandaagrippino@london.edu


%-main endogenous---------------------------------------------------------%

dataSetSpec.sourceFile         ='FRED-MD_2015SM';
% available sourceFile(s)
% FRED-MD_2015m1: 134 monthly series McCracken&Ng(2014) stored lists are:

dataSetSpec.useStoredList      =false; %char with stored list indicated below
% CEE:  EMPL, CPI, PCMDY, FFR, MTWO, TOTR, NBR
%
% BGMs: EMPL, CPI, FFR
%
% BGMm: EMPL, CPI, PCMDY, PINC, CONS, IPROD, CAPU, URATE, HSTART, PPI,
%       PCED, WAGE, FFR, MONE, MTWO, TOTR, NBR, S&P, 10YBY, EXR
%       
% FULL: uses all

%data transformations
dataSetSpec.plotData           =false; %plots individual(!) charts 
dataSetSpec.interpolateMissing =false;


%load base set using St. Louis FRED data
dataStructure=loadFREDmonthly(dataSetSpec);


%-policy variable---------------------------------------------------------%

%replace average rates w\ end of month for psvar identification
if ismember('GS1',dataSetSpec.dataList) && strcmp(modelSpec.identification,'PSVAR')

    load endOfMonthGS1data
    
    isOneYearRate =ismember(dataStructure.varname,'GS1');
    oneyearDates  =datenum(year(eomGS1.dates),month(eomGS1.dates),1);
    
    dataStructure.data(:,isOneYearRate)=eomGS1.data(ismember(oneyearDates,dataStructure.dates),:);
    dataStructure.preSdata(:,isOneYearRate)=eomGS1.data(ismember(oneyearDates,dataStructure.preSdates),:);

end



%-remove problematic series-----------------------------------------------%

%remove reserves if sample goes beyond 2007
if dataStructure.dates(end)>datenum(2007,12,1) && any(ismember(dataStructure.varname,{'TOTRESNS';'NONBORRES'}))

    cutList=ismember(dataStructure.varname,{'TOTRESNS';'NONBORRES'});

    dataStructure.data(:,cutList)=[];
    dataStructure.varname(:,cutList)=[];
    dataStructure.varLongName(:,cutList)=[];
    
    
end


%-load external proxies for shock identification--------------------------%

%load external instruments if PSVAR identification
if strcmp(modelSpec.identification,'PSVAR')
    
    
    if ~isempty(strfind(modelSpec.selectedInstrument,'MPN'))
        
        %load narrative Romer&Romer series (updated)
        load RRnarrative    
        
    else
        if strfind(modelSpec.selectedInstrument,'GK')
            %load surprises in fed fund futures
            load GSSHFinstruments
            
        elseif strfind(modelSpec.selectedInstrument,'BLP')
            %orthogonal to GB and own lags (NEW)
            load BLPmpshock
            externalInstrument=BLPmpshock;
            
        end
    end

    %load in native data structure
    modelSpec.instrument=externalInstrument;
    
end



%-------------------------------------------------------------------------%

function res=loadFREDmonthly(dataSetSpec)
%
% loads up raw data and applies required transformations
% 
% inputs (structure)
% sourceFile         = mat file name (see content below)
% useStoredList      = can be either *false* or a string identifier;
%                      stored list include:
%                      BGMcee:    EMPL, CPI, PCMDY, FFR, MTWO, TOTR, NBR
%                      BGMsmall:  EMPL, CPI, FFR
%                      BGMmedium: EMPL, CPI, PCMDY, PINC, CONS, IPROD, CAPU,
%                                 URATE, HSTART, PPI, PCED, WAGE, FFR, MONE, 
%                                 MTWO, TOTR, NBR, S&P, 10YBY, EXR
%                      FULL:      uses all
% seriesList         = if useStoredList is false, allows to define a new 
%                      one, comes in the form of a cell string {'';'';''}
% beginSet           = date
% endSet             = date
% takeFirstDiff      = true/false, applies to all
% plotData           = true/false if true plots original and transformed
% interpolateMissing = true/false, fills up missing values with centered MA
%                      does not apply to beginning and end of series
%
% output (structure)
% data        = [Txn] matrix of transformed data
% dates       = [Tx1] vector of reference dates
% varname     = {1xn} cell of data identifiers
% varLongName = {1xn} cell of variables names
% 
% miranda 2015 smirandaagrippino@london.edu


listCEE  = {'PAYEMS';'CPIAUCSL';'PPICMM';'FEDFUNDS';'M2SL';'TOTRESNS';'NONBORRES'};
listBGMs = {'PAYEMS';'CPIAUCSL';'FEDFUNDS'};
listBGMm = {'PAYEMS';'CPIAUCSL';'PPICMM';'W875RX1';'DPCERA3M086SBEA';...
    'INDPRO';'CAPUTLB00004S';'UNRATE';'HOUST';'PPIFGS';'PCEPI';'CES3000000008';...
    'FEDFUNDS';'M1SL';'M2SL';'TOTRESNS';'NONBORRES';'S&P 500';'GS10';'TWEXMMTH'};


%load raw data from file
load(dataSetSpec.sourceFile);

%sourceFile is in mat format; contains:
% data            = [TxN] matrix of raw data
% dates           = [Tx1] vector of dates
% logTransform    = [1xN] logical for log transformations
% dataName        = {1xN} cell of data identifiers
% dataDescription = {1xN} cell of variables names

%detect data list
if ischar(dataSetSpec.useStoredList)
    switch dataSetSpec.useStoredList
        
        case 'CEE'
            dataList = listCEE;
            
        case 'BGMs'
            dataList = listBGMs;
            
        case 'BGMm'
            dataList = listBGMm;
            
        case 'FULL'
            dataList = dataName;
    end
    
elseif ~dataSetSpec.useStoredList
    
    dataList=dataSetSpec.dataList;
end

[~,dataSelect]=ismember(dataList,dataName); dataSelect(dataSelect==0)=[];

%trim relevant items
dataRaw         = data(:,dataSelect);
datesRaw        = dates;
dataName        = dataName(dataSelect);
dataDescription = dataDescription(dataSelect);
logTransform    = logTransform(dataSelect);

%correct unit for Non-Borrowed Reserves
if ismember('NONBORRES',dataList)
    
    dataRaw(:,ismember(dataList,'NONBORRES'))=dataRaw(:,ismember(dataList,'NONBORRES'))./1000;
    
end


data=dataRaw; dates=datesRaw;

%take logs
logTransform(and(any(data<0),logTransform))=false; %remove transform if <0
data(:,logTransform)=log(data(:,logTransform))*100;


%fill up NaNs
if dataSetSpec.interpolateMissing
    
    nanOpt.method  = 2;
    nanOpt.winsize = 1;
    
    data=removeNaNs(data,nanOpt);
    
end

%select relevant time span
timeSelect= dates >= dataSetSpec.beginSet & dates <= dataSetSpec.endSet;

dataM  = data(timeSelect,:);
datesM = dates(timeSelect);

%remove rigged edges
edges = any(isnan(dataM),2);

dataM  = dataM(~edges,:);
datesM = datesM(~edges);

%plot data
if dataSetSpec.plotData
    
    for j=1:size(dataM,2)
        
        figure;
        subplot(2,1,1)
        plot(datesRaw,dataRaw(:,j)); axis tight; grid on; dateaxis('x',10);
        set(gca,'FontSize',9)
        title([dataDescription{j}, ' all obs'],'FontSize',11,'FontWeight','normal')
        %
        subplot(2,1,2)
        plot(datesM,dataM(:,j)); axis tight; grid on; dateaxis('x',10);
        set(gca,'FontSize',9)
        title('series over selected sample','FontWeight','normal')
        %
        pause;
        close(gcf)

        
    end
    
end

%-build presample----------------------------------------------------------
timeSelect= dates >= dataSetSpec.beginPreSample & dates <= dataSetSpec.endPreSample;

preSdates = dates(timeSelect);
preSdata  = data(timeSelect,:);


%only for initialization
nanOpt.method  = 1;
nanOpt.winsize = 1;

preSdata=removeNaNs(preSdata,nanOpt);


%load output
res.data        = dataM;
res.dates       = datesM;
res.varname     = dataName;
res.varLongName = dataDescription;
res.preSdata    = preSdata;
res.preSdates   = preSdates;
